In [1]:
%matplotlib inline
from IPython.parallel import Client
c = Client()
Import the individual processing function everywhere (slow!)
In [2]:
%%px --local
import numpy as np
import pandas as pd
import os
from os import path
from skimage import io
from skimage.io import ImageCollection
from skimage.exposure.exposure import equalize_hist
from skimage.color.colorconv import rgb2gray
from skimage.feature.blob import blob_log
from skimage.feature.corner import corner_peaks, corner_harris
import cv2
root_path = "/kaggle/retina"
# train/test directories
train_path = path.join(root_path, 'train')
sample_train = path.join(train_path, 'sample')
# in CSV representation
labels_file = path.join(root_path, "trainLabels.csv")
labels = pd.read_csv(labels_file, header=0)
def get_image_name(file_name):
return path.splitext(path.split(file_name)[1])[0]
def process_single_image(file_name):
image = io.imread(file_name)
image_gray = equalize_hist(rgb2gray(image))
blobs = blob_log(image_gray, max_sigma=30, threshold=.1)
corners = corner_peaks(corner_harris(image_gray), min_distance=2)
image_name = get_image_name(file_name)
level = labels[labels['image'] == image_name]['level'].iloc[0]
return np.array([blobs.shape[0], corners.shape[0], level])
Test on a single file
In [13]:
in_path = path.normpath('/Kaggle/Retina/train/raw')
file_name = path.join(in_path, '457_left.jpeg')
process_single_image(file_name)
Out[13]:
Now on the cluster
In [ ]:
dv = Client()[:]
files = [path.join(sample_train , f) for f in os.listdir(sample_train)]
lenf = len(files)
asr = dv.map(process_single_image, files[:lenf/2])
final = reduce(lambda state, x: np.vstack((state, x)), asr)
final.tofile('/users/boris/Dropbox/Kaggle/retina/Blobs_Corners_0.bin')